{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "############################  糖尿病发病预测 — 特征工程  ############################\n",
    "# 工具包导入\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sn\n",
    "import matplotlib as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>116</td>\n",
       "      <td>74</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>25.6</td>\n",
       "      <td>0.201</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>78</td>\n",
       "      <td>50</td>\n",
       "      <td>32</td>\n",
       "      <td>88</td>\n",
       "      <td>31.0</td>\n",
       "      <td>0.248</td>\n",
       "      <td>26</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10</td>\n",
       "      <td>115</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>35.3</td>\n",
       "      <td>0.134</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2</td>\n",
       "      <td>197</td>\n",
       "      <td>70</td>\n",
       "      <td>45</td>\n",
       "      <td>543</td>\n",
       "      <td>30.5</td>\n",
       "      <td>0.158</td>\n",
       "      <td>53</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8</td>\n",
       "      <td>125</td>\n",
       "      <td>96</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.232</td>\n",
       "      <td>54</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
       "0            6      148             72             35        0  33.6   \n",
       "1            1       85             66             29        0  26.6   \n",
       "2            8      183             64              0        0  23.3   \n",
       "3            1       89             66             23       94  28.1   \n",
       "4            0      137             40             35      168  43.1   \n",
       "5            5      116             74              0        0  25.6   \n",
       "6            3       78             50             32       88  31.0   \n",
       "7           10      115              0              0        0  35.3   \n",
       "8            2      197             70             45      543  30.5   \n",
       "9            8      125             96              0        0   0.0   \n",
       "\n",
       "   DiabetesPedigreeFunction  Age  Outcome  \n",
       "0                     0.627   50        1  \n",
       "1                     0.351   31        0  \n",
       "2                     0.672   32        1  \n",
       "3                     0.167   21        0  \n",
       "4                     2.288   33        1  \n",
       "5                     0.201   30        0  \n",
       "6                     0.248   26        1  \n",
       "7                     0.134   29        0  \n",
       "8                     0.158   53        1  \n",
       "9                     0.232   54        1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 工程数据导入\n",
    "trainDatas = pd.read_csv(r'C:\\Users\\HuangSX\\Desktop\\logistic\\pima-indians-diabetes .csv')\n",
    "# 显示数据前5行内容\n",
    "\"\"\"\n",
    "    Pregnancies - 怀孕（单位：次数）\n",
    "    Glucose - 血浆葡萄糖浓度\n",
    "    BloodPressure - 血压（单位：mm Hg）\n",
    "    SkinThickness - 三头肌皮褶厚度（单位：mm）\n",
    "    Insulin - 餐后血清胰岛素（单位：mm）\n",
    "    BMI - 体重指数（体重（公斤）/ 身高（米）^2）\n",
    "    DiabetesPedigreeFunction - 糖尿病家系作用\n",
    "    Age - 年龄\n",
    "\"\"\"\n",
    "trainDatas.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 768 entries, 0 to 767\n",
      "Data columns (total 9 columns):\n",
      " #   Column                    Non-Null Count  Dtype  \n",
      "---  ------                    --------------  -----  \n",
      " 0   Pregnancies               768 non-null    int64  \n",
      " 1   Glucose                   768 non-null    int64  \n",
      " 2   BloodPressure             768 non-null    int64  \n",
      " 3   SkinThickness             768 non-null    int64  \n",
      " 4   Insulin                   768 non-null    int64  \n",
      " 5   BMI                       768 non-null    float64\n",
      " 6   DiabetesPedigreeFunction  768 non-null    float64\n",
      " 7   Age                       768 non-null    int64  \n",
      " 8   Outcome                   768 non-null    int64  \n",
      "dtypes: float64(2), int64(7)\n",
      "memory usage: 54.1 KB\n"
     ]
    }
   ],
   "source": [
    "# 显示字段的基本信息：数据是否缺失，数据类型等\n",
    "trainDatas.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3.845052</td>\n",
       "      <td>120.894531</td>\n",
       "      <td>69.105469</td>\n",
       "      <td>20.536458</td>\n",
       "      <td>79.799479</td>\n",
       "      <td>31.992578</td>\n",
       "      <td>0.471876</td>\n",
       "      <td>33.240885</td>\n",
       "      <td>0.348958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.369578</td>\n",
       "      <td>31.972618</td>\n",
       "      <td>19.355807</td>\n",
       "      <td>15.952218</td>\n",
       "      <td>115.244002</td>\n",
       "      <td>7.884160</td>\n",
       "      <td>0.331329</td>\n",
       "      <td>11.760232</td>\n",
       "      <td>0.476951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.078000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27.300000</td>\n",
       "      <td>0.243750</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>30.500000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>0.372500</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>140.250000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>127.250000</td>\n",
       "      <td>36.600000</td>\n",
       "      <td>0.626250</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>17.000000</td>\n",
       "      <td>199.000000</td>\n",
       "      <td>122.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>846.000000</td>\n",
       "      <td>67.100000</td>\n",
       "      <td>2.420000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \\\n",
       "count   768.000000  768.000000     768.000000     768.000000  768.000000   \n",
       "mean      3.845052  120.894531      69.105469      20.536458   79.799479   \n",
       "std       3.369578   31.972618      19.355807      15.952218  115.244002   \n",
       "min       0.000000    0.000000       0.000000       0.000000    0.000000   \n",
       "25%       1.000000   99.000000      62.000000       0.000000    0.000000   \n",
       "50%       3.000000  117.000000      72.000000      23.000000   30.500000   \n",
       "75%       6.000000  140.250000      80.000000      32.000000  127.250000   \n",
       "max      17.000000  199.000000     122.000000      99.000000  846.000000   \n",
       "\n",
       "              BMI  DiabetesPedigreeFunction         Age     Outcome  \n",
       "count  768.000000                768.000000  768.000000  768.000000  \n",
       "mean    31.992578                  0.471876   33.240885    0.348958  \n",
       "std      7.884160                  0.331329   11.760232    0.476951  \n",
       "min      0.000000                  0.078000   21.000000    0.000000  \n",
       "25%     27.300000                  0.243750   24.000000    0.000000  \n",
       "50%     32.000000                  0.372500   29.000000    0.000000  \n",
       "75%     36.600000                  0.626250   41.000000    1.000000  \n",
       "max     67.100000                  2.420000   81.000000    1.000000  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看数据的数值特征：均值、标准差、最大最小值、分位数\n",
    "trainDatas.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pregnancies                   0\n",
       "Glucose                       5\n",
       "BloodPressure                35\n",
       "SkinThickness               227\n",
       "Insulin                     374\n",
       "BMI                          11\n",
       "DiabetesPedigreeFunction      0\n",
       "Age                           0\n",
       "Outcome                       0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 缺失数据统计\n",
    "\"\"\"\n",
    "    在Pandas的DataFrame中，通过replace()函数可以将数据子集的值标记为NaN；\n",
    "    标记完缺失值之后，可以利用isnull()函数将数据集中所有的NaN值标记为True，然后就可以得到每一列中缺失值的数量\n",
    "\"\"\"\n",
    "NaN_Col_Names = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']\n",
    "trainDatas[NaN_Col_Names] = trainDatas[NaN_Col_Names].replace(0, np.NaN)\n",
    "trainDatas.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>SkinThickness_MissFlag</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>29.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>32.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>45.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SkinThickness  SkinThickness_MissFlag  Outcome\n",
       "0           35.0                       0        1\n",
       "1           29.0                       0        0\n",
       "2            NaN                       1        1\n",
       "3           23.0                       0        0\n",
       "4           35.0                       0        1\n",
       "5            NaN                       1        0\n",
       "6           32.0                       0        1\n",
       "7            NaN                       1        0\n",
       "8           45.0                       0        1\n",
       "9            NaN                       1        1"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 缺失数据处理：如对Outcome无影响，可用中值填充；如有影响，新增字段标记是否缺失数据\n",
    "trainDatas['SkinThickness_MissFlag'] = trainDatas['SkinThickness'].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "trainDatas[['SkinThickness','SkinThickness_MissFlag', 'Outcome']].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x2dc56ae7588>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEHCAYAAABBW1qbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAY60lEQVR4nO3de7QU5Z3u8e8jotscrwh6kI0BFDPK4SJuFSUnGklGZamYHM3AEJVLhoxLJiTROMw4R6OjjjMmsjSOrkVGRTwGgreRcdRRIWh0VALOlquJTFRoJQpovIQosv2dP/rdZYu9Nw3u6m7Yz2etXt311ltVvwZWP1TVW1WKCMzMzAB2qXUBZmZWPxwKZmaWcSiYmVnGoWBmZhmHgpmZZXatdQGfRffu3aNPnz61LsPMbIeyePHi9RHRo9y8HToU+vTpw6JFi2pdhpnZDkXSK23N8+EjMzPLOBTMzCzjUDAzs8wOfU7BzKwjffjhhxQKBd5///1al9IhGhoaaGxspGvXrhUv41AwM0sKhQJ77bUXffr0QVKty/lMIoINGzZQKBTo27dvxcv58JGZWfL++++z//777/CBACCJ/ffff5v3ehwKZmYldoZAaLU938WhYGZmGYeCmVk7CoUCo0aNon///hxyyCFMmTKFTZs2tbvM1VdfXaXqOp525IfsNDU1xWe9ovmoH8zsoGp2fIuvPbfWJZjV1MqVKzn88MOz6Yjg2GOP5fzzz2f8+PG0tLQwadIkunXrxrXXXtvmevbcc0/ee++9apS8VVt+JwBJiyOiqVx/7ymYmbVh/vz5NDQ0MH78eAC6dOnCtGnTuPXWW7npppuYPHly1ve0005jwYIFTJ06lT/+8Y8MGTKEsWPHAjBz5kwGDRrE4MGDOeeccwB45ZVXGDFiBIMGDWLEiBGsXr0agHHjxnH++efz5S9/mX79+vH4448zYcIEDj/8cMaNG5dt75FHHuG4445j6NChnH322R0WQg4FM7M2LF++nKOOOuoTbXvvvTcHH3wwmzdvLrvMNddcwx577EFzczN33nkny5cv56qrrmL+/Pk8//zzXH/99QBMnjyZc889lyVLljB27Fi+853vZOt46623mD9/PtOmTeP000/ne9/7HsuXL2fp0qU0Nzezfv16rrzySh577DGee+45mpqauO666zrkO+d2nYKkBuAJYPe0nbsj4jJJM4ATgLdT13ER0aziafLrgZHAxtT+XF71mZltTUSUHcHTVns58+fP56yzzqJ79+4AdOvWDYCnn36ae++9F4BzzjmHiy++OFvm9NNPRxIDBw7kwAMPZODAgQAMGDCAl19+mUKhwIoVKxg+fDgAmzZt4rjjjtv+L1oiz4vXPgBOioj3JHUFnpT0UJr3g4i4e4v+pwL90+tY4Ob0bmZWEwMGDOCee+75RNs777zDmjVr2Gefffjoo4+y9rauB6g0QEr77L777gDssssu2efW6c2bN9OlSxe++tWvMmvWrG36PpXI7fBRFLUe5OqaXu2d1R4FzEzLPQPsK6lnXvWZmW3NiBEj2LhxIzNnFgektLS0cOGFFzJu3Dj69etHc3MzH330EWvWrGHhwoXZcl27duXDDz/M1jFnzhw2bNgAwJtvvgnA8ccfz+zZswG48847+eIXv1hxXcOGDeOpp55i1apVAGzcuJHf/OY3n/0Lk/M5BUldJDUDbwCPRsSzadZVkpZImiapNQZ7AWtKFi+kti3XOUnSIkmL1q1bl2f5ZtbJSeK+++7jrrvuon///hx22GE0NDRw9dVXM3z4cPr27cvAgQO56KKLGDp0aLbcpEmTGDRoEGPHjmXAgAFccsklnHDCCQwePJjvf//7ANxwww3cdtttDBo0iDvuuCM711CJHj16MGPGDMaMGcOgQYMYNmwYL7zwQsd852oMSZW0L3Af8FfABuB3wG7AdOC/I+IKSf8O/ENEPJmWmQdcHBGL21qvh6R2LA9Jtc6u3PDNHV1dDkmNiN8DC4BTImJtOkT0AXAbcEzqVgB6lyzWCLxWjfrMzKwot1CQ1CPtISBpD+ArwAut5wnSaKMzgWVpkbnAuSoaBrwdEWvzqs/MzD4tz9FHPYHbJXWhGD5zIuIBSfMl9QAENAN/mfo/SHE46iqKQ1LH51ibmZmVkVsoRMQS4Mgy7Se10T+AC/Kqx8zMts5XNJuZWcahYGZmGT+O08xsG3T0MPZKh4I//PDDTJkyhZaWFr71rW8xderUDq2jlfcUzMzqXEtLCxdccAEPPfQQK1asYNasWaxYsSKXbTkUzMzq3MKFCzn00EPp168fu+22G6NHj+b+++/PZVsOBTOzOvfqq6/Su/fH1/Y2Njby6quv5rIth4KZWZ0rdzuiSm/dva0cCmZmda6xsZE1az6+X2ihUOCggw7KZVsOBTOzOnf00Ufz4osv8tJLL7Fp0yZmz57NGWeckcu2PCTVzGwb1OJuwrvuuis33ngjJ598Mi0tLUyYMIEBAwbks61c1mpmZh1q5MiRjBw5Mvft+PCRmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpbxkFQzs22w+oqBHbq+gy9dutU+EyZM4IEHHuCAAw5g2bJlW+3/WXhPwcyszo0bN46HH364KttyKJiZ1bkvfelLdOvWrSrbyi0UJDVIWijpeUnLJV2e2vtKelbSi5J+Lmm31L57ml6V5vfJqzYzMysvzz2FD4CTImIwMAQ4RdIw4B+BaRHRH3gLmJj6TwTeiohDgWmpn5mZVVFuoRBF76XJrukVwEnA3an9duDM9HlUmibNH6G8bhhuZmZl5XpOQVIXSc3AG8CjwH8Dv4+IzalLAeiVPvcC1gCk+W8D+5dZ5yRJiyQtWrduXZ7lm5l1OrkOSY2IFmCIpH2B+4DDy3VL7+X2Cj71uKGImA5MB2hqavr044jMzHJUyRDSjjZmzBgWLFjA+vXraWxs5PLLL2fixIlbX3A7VOU6hYj4vaQFwDBgX0m7pr2BRuC11K0A9AYKknYF9gHerEZ9Zmb1bNasWVXbVp6jj3qkPQQk7QF8BVgJ/AI4K3U7D7g/fZ6bpknz50e5B5OamVlu8txT6AncLqkLxfCZExEPSFoBzJZ0JfBfwC2p/y3AHZJWUdxDGJ1jbWZmVkZuoRARS4Ajy7T/FjimTPv7wNl51WNmVomIYGcZ+Lg9B1t8RbOZWdLQ0MCGDRu268e03kQEGzZsoKGhYZuW8w3xzMySxsZGCoUCO8tw94aGBhobG7dpGYeCmVnStWtX+vbtW+syasqHj8zMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8vkFgqSekv6haSVkpZLmpLafyjpVUnN6TWyZJm/kbRK0q8lnZxXbWZmVl6eT17bDFwYEc9J2gtYLOnRNG9aRPyotLOkI4DRwADgIOAxSYdFREuONZqZWYnc9hQiYm1EPJc+vwusBHq1s8goYHZEfBARLwGrgGPyqs/MzD6tKucUJPUBjgSeTU2TJS2RdKuk/VJbL2BNyWIFyoSIpEmSFklatLM8XNvMrF7kHgqS9gTuAb4bEe8ANwOHAEOAtcCPW7uWWTw+1RAxPSKaIqKpR48eOVVtZtY55RoKkrpSDIQ7I+JegIh4PSJaIuIj4Kd8fIioAPQuWbwReC3P+szM7JPyHH0k4BZgZURcV9Les6Tb14Bl6fNcYLSk3SX1BfoDC/Oqz8zMPi3P0UfDgXOApZKaU9vfAmMkDaF4aOhl4NsAEbFc0hxgBcWRSxd45JGZWXXlFgoR8STlzxM82M4yVwFX5VWTmZm1z1c0m5lZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmaZikJB0rxK2szMbMfW7jOaJTUAnwO6S9qPj5+5vDdwUM61mZlZlW1tT+HbwGLgT9J76+t+4J/bW1BSb0m/kLRS0nJJU1J7N0mPSnoxve+X2iXpBkmrJC2RNPSzfjkzM9s27YZCRFwfEX2BiyKiX0T0Ta/BEXHjVta9GbgwIg4HhgEXSDoCmArMi4j+wLw0DXAq0D+9JgE3b//XMjOz7dHu4aNWEfETSccDfUqXiYiZ7SyzFlibPr8raSXQCxgFnJi63Q4sAP46tc+MiACekbSvpJ5pPWZmVgUVhYKkO4BDgGagJTUH0GYobLF8H+BI4FngwNYf+ohYK+mA1K0XsKZksUJq+0QoSJpEcU+Cgw8+uJLNm5lZhSoKBaAJOCL9L36bSNoTuAf4bkS8I6nNrmXaPrW9iJgOTAdoamra5nrMzKxtlV6nsAz4n9u6ckldKQbCnRFxb2p+XVLPNL8n8EZqLwC9SxZvBF7b1m2amdn2qzQUugMrJP2HpLmtr/YWUHGX4BZgZURcVzJrLnBe+nwexZFMre3nplFIw4C3fT7BzKy6Kj189MPtWPdw4BxgqaTm1Pa3wDXAHEkTgdXA2Wneg8BIYBWwERi/Hds0M7PPoNLRR49v64oj4knKnycAGFGmfwAXbOt2zMys41Q6+uhdPj7puxvQFfhDROydV2FmZlZ9le4p7FU6LelM4JhcKjIzs5rZrrukRsS/Aid1cC1mZlZjlR4++nrJ5C4Ur1vwNQJmZjuZSkcfnV7yeTPwMsXbUpiZ2U6k0nMKHh5qZtYJVHr4qBH4CcVrDwJ4EpgSEYUca7MqW33FwFqXUDcOvnRprUswq4lKTzTfRvGK44Mo3qTu31KbmZntRCoNhR4RcVtEbE6vGUCPHOsyM7MaqDQU1kv6pqQu6fVNYEOehZmZWfVVGgoTgG8Av6P4fIOz8L2JzMx2OpUOSf174LyIeAuKz1kGfkQxLMzMbCdR6Z7CoNZAAIiINyk+Sc3MzHYilYbCLpL2a51IewqV7mWYmdkOotIf9h8D/ynpborXKXwDuCq3qszMrCYqvaJ5pqRFFG+CJ+DrEbEi18rMzKzqKj4ElELAQWBmthPbrltnm5nZzsmhYGZmGYeCmZllcgsFSbdKekPSspK2H0p6VVJzeo0smfc3klZJ+rWkk/Oqy8zM2pbnnsIM4JQy7dMiYkh6PQgg6QhgNDAgLXOTpC451mZmZmXkFgoR8QTwZoXdRwGzI+KDiHgJWAUck1dtZmZWXi3OKUyWtCQdXmq9SroXsKakTyG1fYqkSZIWSVq0bt26vGs1M+tUqh0KNwOHAEMo3m31x6ldZfpGuRVExPSIaIqIph49/EgHM7OOVNVQiIjXI6IlIj4CfsrHh4gKQO+Sro3Aa9WszczMqhwKknqWTH4NaB2ZNBcYLWl3SX2B/sDCatZmZmY53ulU0izgRKC7pAJwGXCipCEUDw29DHwbICKWS5pD8TYam4ELIqIlr9rMzKy83EIhIsaUab6lnf5X4TuvmpnVlJ+JYFanjvrBzFqXUDcWX3turUvoNHybCzMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMwsk1soSLpV0huSlpW0dZP0qKQX0/t+qV2SbpC0StISSUPzqsvMzNqW557CDOCULdqmAvMioj8wL00DnAr0T69JwM051mVmZm3ILRQi4gngzS2aRwG3p8+3A2eWtM+MomeAfSX1zKs2MzMrr9rnFA6MiLUA6f2A1N4LWFPSr5DazMysiurlRLPKtEXZjtIkSYskLVq3bl3OZZmZdS7VDoXXWw8Lpfc3UnsB6F3SrxF4rdwKImJ6RDRFRFOPHj1yLdbMrLOpdijMBc5Ln88D7i9pPzeNQhoGvN16mMnMzKpn17xWLGkWcCLQXVIBuAy4BpgjaSKwGjg7dX8QGAmsAjYC4/Oqy8zM2pZbKETEmDZmjSjTN4AL8qrFzMwqUy8nms3MrA7ktqdgZtZRVl8xsNYl1I2DL12a6/q9p2BmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZWryOE5JLwPvAi3A5ohoktQN+DnQB3gZ+EZEvFWL+szMOqta7il8OSKGRERTmp4KzIuI/sC8NG1mZlVUT4ePRgG3p8+3A2fWsBYzs06pVqEQwCOSFkualNoOjIi1AOn9gHILSpokaZGkRevWratSuWZmnUNNzikAwyPiNUkHAI9KeqHSBSNiOjAdoKmpKfIq0MysM6rJnkJEvJbe3wDuA44BXpfUEyC9v1GL2szMOrOqh4Kk/yFpr9bPwJ8Cy4C5wHmp23nA/dWuzcyss6vF4aMDgfsktW7/ZxHxsKRfAXMkTQRWA2fXoDYzs06t6qEQEb8FBpdp3wCMqHY9Zmb2sXoakmpmZjXmUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPL1F0oSDpF0q8lrZI0tdb1mJl1JnUVCpK6AP8MnAocAYyRdERtqzIz6zzqKhSAY4BVEfHbiNgEzAZG1bgmM7NOY9daF7CFXsCakukCcGxpB0mTgElp8j1Jv65SbTu9z0N3YH2t66gLl6nWFVgJ/9ss0TH/Nj/f1ox6C4Vy3zY+MRExHZhenXI6F0mLIqKp1nWYbcn/Nqun3g4fFYDeJdONwGs1qsXMrNOpt1D4FdBfUl9JuwGjgbk1rsnMrNOoq8NHEbFZ0mTgP4AuwK0RsbzGZXUmPixn9cr/NqtEEbH1XmZm1inU2+EjMzOrIYeCmZllHArmW4tY3ZJ0q6Q3JC2rdS2dhUOhk/OtRazOzQBOqXURnYlDwXxrEatbEfEE8Gat6+hMHApW7tYivWpUi5nVmEPBtnprETPrPBwK5luLmFnGoWC+tYiZZRwKnVxEbAZaby2yEpjjW4tYvZA0C3ga+IKkgqSJta5pZ+fbXJiZWcZ7CmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgVSfpEknLJS2R1CzpWEkvS+pepu9/bmVd96V1rJL0dvrcLOn4dtZ5Rnu3CJfUpx5u1SzpRElROjZf0pGp7aI0fYWkr2zHekv/rB5L7T9sXa91XnX1jGbb+Uk6DjgNGBoRH6Qf7d3a6h8Rx7e3voj4WlrvicBFEXFaybbaWmYuO85V20uBPwNuSdOjgedbZ0bEpdu53l+W/lmZtfKeglVbT2B9RHwAEBHrIyK715KkPSQ9LOkv0vR76f1ESQsk3S3pBUl3qq1f/U/6K0nPSVoq6U/SusZJujF9PjDtbTyfXp8IIUn9JP2XpKPTcvem+l6U9E8l/f5U0tNpW3dJ2jO1XyNpRdor+lFqO1vSsrS9J7ZS/2qgIdUpis8WeKhkuzMkndVB2yr93n8h6VdpuXskfS61HyLpmTTvita/H9t5OBSs2h4Bekv6jaSbJJ1QMm9P4N+An0XET8sseyTwXYoPA+oHDK9ge+sjYihwM1Du0MgNwOMRMRgYCmS3+JD0BeAeYHxE/Co1D6H4P/eBwJ9J6p32dv4O+Era1iLg+5K6AV8DBkTEIODKtI5LgZPTNs+o4DvcDZwNHA88B3ywZYft2Nb/Ljl8dEmZbd4bEUen5VYCrYewrgeuj4ij8Y0Td0oOBauqiHgPOAqYBKwDfi5pXJp9P3BbRMxsY/GFEVGIiI+AZqBPBZu8N70vbqP/SRQDg4hoiYi3U3uPVM83I6K5pP+8iHg7It4HVgCfB4ZRDKqnJDUD56X2d4D3gX+R9HVgY1rHU8CMtDfUpYLvMIdiKIwBZrXRZ1u39cuIGJJeV5VZ3/+S9EtJS4GxwIDUfhxwV/r8swpqtx2MQ8GqLv34LoiIyyjejO//pFlPAae2c1io9H/ILVR2Tqx1mUr7t3qb4sOHttwbKVeDgEdLfmSPiIiJ6WaDx1Dc2zgTeBggIv6S4p5Fb6BZ0v7tFRIRvwM+BL4KzGujT4dsq8QMYHJEDAQuBxoqXM52cA4FqypJX5DUv6RpCPBK+nwpsAG4qYolzQPOT7V1kbR3at9E8cf1XEl/vpV1PAMMl3RoWs/nJB2WzivsExEPUjzsNSTNPyQink0nidfzyedZtOVS4K8joqXczA7eFsBewFpJXSnuKZR+19YQH13humwH4tFHVm17Aj+RtC+wGVhF8VBS60iY7wK3SvqniLi4CvVMAaanYZ8tFANiLUBE/EHSacCjkv7Q1goiYl06BDZL0u6p+e+Ad4H7JTVQ3Jv4Xpp3bQpGUQyl59mKiGh3aC7FH/FKt3VC+VV8wv8FnqUY2EvT+qH49/P/JF0I/DvFPSrbifjW2WZWsTQK6Y8REZJGA2MiYlSt67KO4z0FM9sWRwE3pvM+vwcm1Lge62DeUzCrMUknA/+4RfNLrRfmmVWTQ8HMzDIefWRmZhmHgpmZZRwKZmaWcSiYmVnm/wO2gGJfUciX0wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sn.countplot(x='SkinThickness_MissFlag', hue='Outcome', data=trainDatas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pregnancies                 0\n",
       "Glucose                     0\n",
       "BloodPressure               0\n",
       "SkinThickness               0\n",
       "Insulin                     0\n",
       "BMI                         0\n",
       "DiabetesPedigreeFunction    0\n",
       "Age                         0\n",
       "Outcome                     0\n",
       "SkinThickness_MissFlag      0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用中值填充缺失数据\n",
    "medians = trainDatas.median() \n",
    "trainDatas = trainDatas.fillna(medians)\n",
    "trainDatas.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据标准化(归一化)处理\n",
    "y_train = trainDatas['Outcome']\n",
    "X_train = trainDatas.drop(['Outcome'], axis=1)\n",
    "\n",
    "# 保存特征字段名称\n",
    "feat_names = X_train.columns\n",
    "\n",
    "# 导入标准化工具包\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "# 初始化标准器实例\n",
    "ss_X = StandardScaler()\n",
    "# 对数据的特征进行标准化处理\n",
    "X_train = ss_X.fit_transform(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 特征值X_train处理，构造2维表格\n",
    "X_train = pd.DataFrame(columns=feat_names, data=X_train)\n",
    "# 输出变量y_train处理，将y_train加入到2维表格X_train\n",
    "trainDatas = pd.concat([X_train, y_train], axis=1)\n",
    "# 处理结果保存到csv文件\n",
    "trainDatas.to_csv(r'C:\\Users\\HuangSX\\Desktop\\logistic\\Train_datas.csv', index=False, header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>SkinThickness_MissFlag</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.639947</td>\n",
       "      <td>0.866045</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.166619</td>\n",
       "      <td>0.468492</td>\n",
       "      <td>1.425995</td>\n",
       "      <td>-0.647760</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.205066</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.852200</td>\n",
       "      <td>-0.365061</td>\n",
       "      <td>-0.190672</td>\n",
       "      <td>-0.647760</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.233880</td>\n",
       "      <td>2.016662</td>\n",
       "      <td>-0.693761</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-1.332500</td>\n",
       "      <td>0.604397</td>\n",
       "      <td>-0.105584</td>\n",
       "      <td>1.543781</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.073567</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.695245</td>\n",
       "      <td>-0.540642</td>\n",
       "      <td>-0.633881</td>\n",
       "      <td>-0.920763</td>\n",
       "      <td>-1.041549</td>\n",
       "      <td>-0.647760</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.141852</td>\n",
       "      <td>0.504422</td>\n",
       "      <td>-2.679076</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>0.316566</td>\n",
       "      <td>1.549303</td>\n",
       "      <td>5.484909</td>\n",
       "      <td>-0.020496</td>\n",
       "      <td>-0.647760</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.342981</td>\n",
       "      <td>-0.185948</td>\n",
       "      <td>0.133453</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.997745</td>\n",
       "      <td>-0.818079</td>\n",
       "      <td>-0.275760</td>\n",
       "      <td>1.543781</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-0.250952</td>\n",
       "      <td>-1.435189</td>\n",
       "      <td>-1.851862</td>\n",
       "      <td>0.329171</td>\n",
       "      <td>-0.610145</td>\n",
       "      <td>-0.211799</td>\n",
       "      <td>-0.676133</td>\n",
       "      <td>-0.616111</td>\n",
       "      <td>-0.647760</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1.827813</td>\n",
       "      <td>-0.218823</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.414047</td>\n",
       "      <td>-1.020427</td>\n",
       "      <td>-0.360847</td>\n",
       "      <td>1.543781</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>-0.547919</td>\n",
       "      <td>2.476909</td>\n",
       "      <td>-0.197433</td>\n",
       "      <td>1.808882</td>\n",
       "      <td>4.660524</td>\n",
       "      <td>-0.284572</td>\n",
       "      <td>-0.947944</td>\n",
       "      <td>1.681259</td>\n",
       "      <td>-0.647760</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1.233880</td>\n",
       "      <td>0.109925</td>\n",
       "      <td>1.953325</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.022590</td>\n",
       "      <td>-0.724455</td>\n",
       "      <td>1.766346</td>\n",
       "      <td>1.543781</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \\\n",
       "0     0.639947  0.866045      -0.031990       0.670643 -0.181541  0.166619   \n",
       "1    -0.844885 -1.205066      -0.528319      -0.012301 -0.181541 -0.852200   \n",
       "2     1.233880  2.016662      -0.693761      -0.012301 -0.181541 -1.332500   \n",
       "3    -0.844885 -1.073567      -0.528319      -0.695245 -0.540642 -0.633881   \n",
       "4    -1.141852  0.504422      -2.679076       0.670643  0.316566  1.549303   \n",
       "5     0.342981 -0.185948       0.133453      -0.012301 -0.181541 -0.997745   \n",
       "6    -0.250952 -1.435189      -1.851862       0.329171 -0.610145 -0.211799   \n",
       "7     1.827813 -0.218823      -0.031990      -0.012301 -0.181541  0.414047   \n",
       "8    -0.547919  2.476909      -0.197433       1.808882  4.660524 -0.284572   \n",
       "9     1.233880  0.109925       1.953325      -0.012301 -0.181541 -0.022590   \n",
       "\n",
       "   DiabetesPedigreeFunction       Age  SkinThickness_MissFlag  Outcome  \n",
       "0                  0.468492  1.425995               -0.647760        1  \n",
       "1                 -0.365061 -0.190672               -0.647760        0  \n",
       "2                  0.604397 -0.105584                1.543781        1  \n",
       "3                 -0.920763 -1.041549               -0.647760        0  \n",
       "4                  5.484909 -0.020496               -0.647760        1  \n",
       "5                 -0.818079 -0.275760                1.543781        0  \n",
       "6                 -0.676133 -0.616111               -0.647760        1  \n",
       "7                 -1.020427 -0.360847                1.543781        0  \n",
       "8                 -0.947944  1.681259               -0.647760        1  \n",
       "9                 -0.724455  1.766346                1.543781        1  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 显示处理后的前10行数据\n",
    "trainDatas.head(10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
